Attribute VB_Name = "Sig_CIS_ELEMS_randBackground"
Option Explicit
'* antisense is not automatically incoprated (this version)
'* tss is taken if possible, otherwise the atg is used.

Dim MM As New MotifMapperBasicClass
Dim gbSQL_BC As New aGB_SQL_BasicClass
Dim Ztbl As New Z_Table

Type GenBankAnot
    Gene As String
    mRNA As String
    CDS   As String
    Orient As Integer
        '0 is sense
        '1 is anti-sense
    Name As String
    LocusTag As String
    Occuppied As Integer
End Type

Type dna_Motif
  elem As String
  elemIUPAC As String
End Type

Type elem_stats
  ratio As Double
  Variance As Double
End Type

Dim ExpPromotersFound() As String
Dim ExperimentalArraySEQindex() As String
Dim ExperimentalFileAGI_listPaths() As String          '- will be an array of paths, whose destination folders are built during query
Dim RootFolderForExpFiles As String
Dim SHOWrandomizationIndexes As Integer
Dim MEANsOfRandom() As elem_stats
Dim STATSofExperimental() As elem_stats
Dim TotalRandomStdev() As elem_stats
Dim Motif_list_Obj() As dna_Motif
Dim Motif_RandPromotersHit_BinaryArray     '-- each motif gets a promoters_hit_array
Dim Motif_RandVariance_BinaryArray         '-- each motif gets a variance_array
Dim Promoters_w_Hits() As Double
Dim Promoters_Hits_as_String_Array() As String '-- "%" delimited for the variance calculation
Dim RandomLIST
Dim MASTERgeneLIST As String
Dim RestrictLIST As String
Dim RAND_SIZE As Integer
Dim NUM_RANDOMIZATIONS As Integer
Dim dataSPACE() As String
Dim RandomPromoterArraySEQ() As String '- will hold as string of promoter sequences
'Dim RandomPromoterArrayNAME '- will hold as string of promoter names
Dim gbFileFolder, seqFileFolder
Dim gbFileExtension
Dim seqFileExtension

Dim RootFolderName
Dim FolderRootStem
Dim quesCONTROL As Integer
Dim OutFileRandIndex        '-for object for tracking randomization indicies
Dim OutFileRandValues
Dim OutFileExperimental
Dim fsO                     '-for File System Object - initilized in main
    

Sub Main()
  '*-LOCAL VARIABLES, frmGBSQL works in the background
   Dim i As Integer, ii As Integer
   Dim datei
   Dim dummyVariable
   Dim BeginTime

   '*-GLOBAL VARIABLES
   Set fsO = CreateObject("Scripting.FileSystemObject")
   
   RootFolderName = MM.setROOT("\FreqAnalysis\")
   'prepare teh folder
   If Not (fsO.folderExists(RootFolderName)) Then
    'create a new folder for convience
    Set datei = fsO.Createfolder(RootFolderName)
   End If
   
   '-------------------------*-*----------------------*-*------
   '* read in the master gene list (already tested! from GeneBank files)
   quesCONTROL = InputBox("Restrict data space? -use // list-" & vbCr & "[0] = no" & vbCr & "[1] = yes", "Restiction of Data Space", 0)
   RestrictLIST = ""
   If quesCONTROL = 1 Then Initilize_RestrictList
   
   '* read in the motifs to be searched
   Initilize_MotifList
   
   ReDim Promoters_w_Hits(UBound(Motif_list_Obj))
   ReDim Promoters_Hits_as_String_Array(UBound(Motif_list_Obj))
   ReDim MEANsOfRandom(UBound(Motif_list_Obj))
   ReDim TotalRandomStdev(UBound(Motif_list_Obj))
   ReDim STATSofExperimental(UBound(Motif_list_Obj))
   
   
   '* get the location of the GenBank files and their text-only sequence files
   Initilize_GBfiles
   
   '* input num of randomizations
   NUM_RANDOMIZATIONS = InputBox("Number of Randomizations", "Number of Randomization Cycles", 1000)
   
   ReDim Motif_RandPromotersHit_BinaryArray(UBound(Motif_list_Obj), NUM_RANDOMIZATIONS)
   ReDim Motif_RandVariance_BinaryArray(UBound(Motif_list_Obj), NUM_RANDOMIZATIONS)
   
   '* toggle SHOWrandomizationIndexes on/off
   SHOWrandomizationIndexes = InputBox("Output Randomization Indexes?" & vbCr & "[0] = no" & vbCr & "[1] = yes", "Track Indicies", 1)
   
   '* input the list size of the random set (no array option, e.g. 10, 20, 30, etc...)
   '  RAND_SIZE = InputBox("Random list size, please", "Random List Size Input Box", 25)
                           
   '* input the RootFolder with experimental promoter sets
   RootFolderForExpFiles = InputBox("Enter the Root Folder Path with all " & vbCr & "experimental data.", "Root Folder for Experimental Data", "")
   If RootFolderForExpFiles <> "" Then dummyVariable = RetrieveAllTextFilePaths(RootFolderForExpFiles, RootFolderName, 0, ExperimentalFileAGI_listPaths)
   
   FolderRootStem = FolderPathStem(RootFolderForExpFiles)
   
   '* MsgBox "collect promoters"
   ReDim dataSPACE(0)
   MASTERgeneLIST = "%"
   CollectPromoters
   '* dataSPACE get all promoter sequences, is gloabally processed



   '*----- ANALYZE EXPERIMENTAL SETS -----
   'ExperimentalFileAGI_listPaths(0) = ""   '* should be turned off only here for now
   If RootFolderForExpFiles <> "" Then   'Something to Compare?
  
  
    For i = 1 To UBound(ExperimentalFileAGI_listPaths) '1 index


    '* get the experimental AGI gene list
     RestrictLIST = getAGIlist(ExperimentalFileAGI_listPaths(i), "Common")
    
     
     '*get the promoters from the AGI to the ExperiementalArraySEQ
     'ReDim ExpPromotersFound(0)
     ExpPromotersFound = RetrievePromotersWithKeyIndex(RestrictLIST)
     '* the PROMOTER sequences are saved in ExperiementalArraySEQ
     RAND_SIZE = UBound(ExpPromotersFound)
     
     
     '* ESTABLISH pleminary output file - write headers
     'MsgBox RootFolderName + Replace(ExperimentalFileAGI_listPaths(i), FolderRootStem, "") + " -" + Str(NUM_RANDOMIZATIONS) + " of" + Str(RAND_SIZE) + " on " + DateTime.Date$ + ".txt"
     
     Set OutFileExperimental = fsO.CreateTextFile(RootFolderName + Replace(ExperimentalFileAGI_listPaths(i), FolderRootStem, "") + " -" + str(NUM_RANDOMIZATIONS) + " of" + str(RAND_SIZE) + " on " + DateTime.Date$ + ".txt", True)
         OutFileExperimental.write "OUTPUT: " + (RootFolderName + Replace(ExperimentalFileAGI_listPaths(i), FolderRootStem, "") + " -" + str(NUM_RANDOMIZATIONS) + " of" + str(RAND_SIZE) + " on " + DateTime.Date$ + ".txt") & vbNewLine
         OutFileExperimental.write str(UBound(ExpPromotersFound)) + " sequences found." & vbNewLine
         BeginTime = Now

    
    
     '* NOTHING in SET (from what was found)
     If UBound(ExpPromotersFound) = 0 Then GoTo NO_SEQS_FOUND
     

       
   '*OUTPUT the Randomization Model Data
   If SHOWrandomizationIndexes = 1 Then
   
   Dim tStr  As String
       tStr = Replace(ExperimentalFileAGI_listPaths(i), FolderRootStem, "")
       tStr = Replace(tStr, MM.SpliceOutFileName(ExperimentalFileAGI_listPaths(i)) + ".txt", "", , , vbBinaryCompare)
       tStr = RootFolderName + tStr + "RandomModel\"

        'prepare teh folder
        If Not (fsO.folderExists(tStr)) Then
        'create a new folder for convience
         Set datei = fsO.Createfolder(tStr)
        End If
        
         Set OutFileRandIndex = fsO.CreateTextFile(tStr + "RandomIndicies of " + str(RAND_SIZE) + " x " + str(NUM_RANDOMIZATIONS) + " for " + MM.SpliceOutFileName(ExperimentalFileAGI_listPaths(i)) + ".txt", True)
         Set OutFileRandValues = fsO.CreateTextFile(tStr + "Set Values of " + str(RAND_SIZE) + " x " + str(NUM_RANDOMIZATIONS) + " for " + MM.SpliceOutFileName(ExperimentalFileAGI_listPaths(i)) + ".txt", True)
   
   End If  '*OUTPUT the Randomization Model Data
    
    
    '* BACKGROUND MODEL
    doSpecificRandomizationBackroundModel

    
    '* EXPERIMENTAL DATASET
    '* RESET and catch values for each motif! --(is proportional to each motif)----------
    For ii = 1 To UBound(Motif_list_Obj)
     Promoters_w_Hits(ii) = 0
     Promoters_Hits_as_String_Array(ii) = ""
    Next ii
    '* uses the two arrays above
    getHITStoArraysByIndex (ExperimentalArraySEQindex)
   
    
    
    '* get promoters-hit-value (done) and variance for experimental data set
     For ii = 1 To UBound(Motif_list_Obj)
      STATSofExperimental(ii).ratio = 0
      STATSofExperimental(ii).Variance = Empty
     Next ii
     '*
     processHitsExperimentSet
        
      
     '* final prep for output
         OutFileExperimental.write "Element" & vbTab & "Promoters w Hits" & vbTab & "Variance" & vbTab & "Z-Score(Ratio)" & vbTab & "Z-Score(Variance)" '& vbTab & "P-Value(Ratio)" & vbTab & "P-Value(Variance)"
         OutFileExperimental.write vbNewLine & vbNewLine
      
      '* go through motifs
      For ii = 1 To UBound(Motif_list_Obj)
       OutFileExperimental.write Motif_list_Obj(ii).elem & vbTab
       OutFileExperimental.write STATSofExperimental(ii).ratio & vbTab
       OutFileExperimental.write STATSofExperimental(ii).Variance & vbTab
             
             
       '* compute and output z-score p-values from table
       Z_scorePvalue (ii)

       '* computer p-value from population distribtion
       'PopDistPvalue (ii)
       
     OutFileExperimental.write vbNewLine
     Next ii
     '* go through motifs
     

     '* output promoter sequences for further analysis
     OutFileExperimental.write vbNewLine
     For ii = 1 To UBound(ExpPromotersFound)
     OutFileExperimental.write ">" + ExpPromotersFound(ii) & vbNewLine
     OutFileExperimental.write dataSPACE(ExperimentalArraySEQindex(ii)) & vbNewLine
     Next ii
     OutFileExperimental.write vbNewLine & "Calculation Time. From: " & BeginTime & " to: " & Now & vbNewLine
     
NO_SEQS_FOUND:
    '------* any final calls
    OutFileExperimental.Close
    If SHOWrandomizationIndexes = 1 Then
    OutFileRandIndex.Close
    OutFileRandValues.Close
    End If
    Next i 'Process individual promoter sets -
   End If '*Somthing to Compare - maybe we only want random data for analysis!!!
   

End Sub

Sub doSpecificRandomizationBackroundModel()
Dim i, ii

   For i = 1 To UBound(Motif_list_Obj)
     MEANsOfRandom(i).ratio = 0
     MEANsOfRandom(i).Variance = Empty
   Next i
   
 '-------------------------*-*----------------------*-*------
   For i = 1 To NUM_RANDOMIZATIONS
    
    '* retrieve new random list
    '* RandomLIST = get_random_LIST(MASTERgeneLIST, RAND_SIZE)
    '* this is the orginal version - which sends RandomLIST as a
    '* % delimited string
   
    RandomLIST = get_random_LIST(MASTERgeneLIST, RAND_SIZE, 1)
       
    'MsgBox UBound(RandomLIST)
       
    'ReDim RandomPromoterArraySEQ(RAND_SIZE)
    'For ii = 1 To UBound(RandomLIST)
    ' RandomPromoterArraySEQ(ii) = dataSPACE(RandomLIST(ii))
    'Next ii
   
   '* The promoter data set is scanned for the elmemets *'
   '* I have to send two arrays - one, for the pro-hit/elem; two, #hits%arraystring/elem
   
    For ii = 1 To UBound(Motif_list_Obj)
     Promoters_w_Hits(ii) = 0
     Promoters_Hits_as_String_Array(ii) = ""
    Next ii
   
   '* uses the two arrays above
    'MsgBox UBound(RandomPromoterArraySEQ)
    getHITStoArraysByIndex (RandomLIST) 'we use the current random list


    'MsgBox "stop3"
    processHITSforRandomSet (i) '*keep the dataset for each motif as array

   Next i 'end of randomization for z-table
   ReturnRandomValues  '* ouput the random values if desired
  '------------*^*---------*^*-------------
  
   For i = 1 To UBound(Motif_list_Obj)
    TotalRandomStdev(i).ratio = 0
    TotalRandomStdev(i).Variance = Empty
   Next i

    SortRandomMatchArrays_getSTDEV

  

End Sub

Function FolderPathStem(ByVal fullpath As String) As String
 '* this returns the path part before the last folder
 '- this REQUIRES that at least one folder is given...no drive possible

If InStr(Len(fullpath) - 1, fullpath, "\") Then fullpath = Mid(fullpath, 1, Len(fullpath) - 1)
FolderPathStem = Mid(fullpath, 1, InStrRev(fullpath, "\", , vbBinaryCompare) - 1)
FolderPathStem = FolderPathStem + "\"

End Function

Function RetrieveAllTextFilePaths(LocalRootFolder, ByVal NewPath As String, FoundFiles As Integer, PathsArray)
 '* will return all text files found under a given folder
 '* including subfolders, all folders with -.txt files will be
 '* created under the root folder
Dim QueriedObj, oTextFiles, obj
Dim TempArr() As String
Dim datei
Dim dummy

If FoundFiles = 0 Then
    ReDim PathsArray(0)
End If

 Set QueriedObj = fsO.GetFolder(LocalRootFolder)  '* the current folder/subfolder
 
 'Set oTextFiles = QueriedObj.Files   '* get only the .txt files
 
 'For Each obj In QueriedObj.Files
 ' If InList(fsO3.GetExtensionName(obj), fileFORMAT) Then
 
 '- if we find that there are text files there we set the destination folders

    'prepare another folder
    NewPath = NewPath + "\" + QueriedObj.Name
    If Not (fsO.folderExists(NewPath)) Then
     Set datei = fsO.Createfolder(NewPath)
    End If

 
 For Each obj In QueriedObj.Files
  If UCase(fsO.GetExtensionName(obj)) = "TXT" Then
   ReDim Preserve PathsArray(UBound(PathsArray) + 1)
   PathsArray(UBound(PathsArray)) = fsO.GetAbsolutePathName(obj)
  End If
 Next obj
  
 For Each obj In QueriedObj.subFolders
 'MsgBox obj
  dummy = RetrieveAllTextFilePaths(obj, NewPath, UBound(PathsArray), PathsArray)
 Next

End Function

Sub PopDistPvalue(motifPOSition)
Dim MMsort As New MM_Sorting
Dim FoundTypeInt As Integer
Dim PositionInDistribiution
Dim TmpArray
Dim p_value As Double
'Dim ii

'For ii = 1 To UBound(Motif_list_Obj)

'* do Promoter Hits first
PositionInDistribiution = MMsort.SortToPositionInColumnOfBinaryArray(Motif_RandPromotersHit_BinaryArray, motifPOSition, NUM_RANDOMIZATIONS, STATSofExperimental(motifPOSition).ratio, FoundTypeInt)

If FoundTypeInt = 1 Or FoundTypeInt = -1 Then   '* value exceedes all values in population
                                               '* value is less than all values in population
 p_value = 0
 OutFileExperimental.write p_value & vbTab
 
ElseIf FoundTypeInt = 0 Then

 If Motif_RandPromotersHit_BinaryArray(motifPOSition, PositionInDistribiution) >= MEANsOfRandom(motifPOSition).ratio Then
  
  p_value = (NUM_RANDOMIZATIONS - (PositionInDistribiution - 1)) / NUM_RANDOMIZATIONS
  OutFileExperimental.write p_value & vbTab
 
 Else  '* position value is less than mean of population
  
  p_value = (PositionInDistribiution - 1) / NUM_RANDOMIZATIONS
  OutFileExperimental.write p_value & vbTab
 
 End If

End If  '* end place and calculate p_value
'----------- END do Promoter Hits first


'*  do variance
PositionInDistribiution = MMsort.SortToPositionInColumnOfBinaryArray(Motif_RandVariance_BinaryArray, motifPOSition, NUM_RANDOMIZATIONS, STATSofExperimental(motifPOSition).Variance, FoundTypeInt)

If FoundTypeInt = 1 Or FoundTypeInt = -1 Then   '* value exceedes all values in population
                                               '* value is less than all values in population
 p_value = 0
 OutFileExperimental.write p_value & vbTab
 
ElseIf FoundTypeInt = 0 Then

 If Motif_RandVariance_BinaryArray(motifPOSition, PositionInDistribiution) >= MEANsOfRandom(motifPOSition).Variance Then
  
  p_value = (NUM_RANDOMIZATIONS - (PositionInDistribiution - 1)) / NUM_RANDOMIZATIONS
  OutFileExperimental.write p_value & vbTab
 
 Else  '* position value is less than mean of population
  
  p_value = (PositionInDistribiution - 1) / NUM_RANDOMIZATIONS
  OutFileExperimental.write p_value & vbTab
 
 End If

End If  '* end place and calculate p_value
'----------- END do Promoter Hits first







'Next ii

End Sub


Sub Z_scorePvalue(motifPOSition)
'* Ztable Class Instance is declared Global to prevent new initilzations
'Dim ii, i
Dim p_value As Double
Dim ZSCORE As Double

'OutFileExperimental

'For ii = 1 To UBound(Motif_list_Obj)

  'OutFileExperimental.write Motif_list_Obj(motifPOSition).elem & vbTab
 '* NEED TO THINK ABOUT if NOTHING is found in the background....
  'num promoters hit
  If MEANsOfRandom(motifPOSition).ratio = 0 Then
  
   OutFileExperimental.write "not found in background" & vbTab
  
  Else
   
   ZSCORE = Abs((STATSofExperimental(motifPOSition).ratio - MEANsOfRandom(motifPOSition).ratio) / TotalRandomStdev(motifPOSition).ratio)
   p_value = Ztbl.Z_Table_Pvalue(ZSCORE)
  
  If (STATSofExperimental(motifPOSition).ratio - MEANsOfRandom(motifPOSition).ratio) > 0 Then
   OutFileExperimental.write p_value & vbTab
  Else
   OutFileExperimental.write -(p_value) & vbTab
  End If
  
 End If
 
 
  If MEANsOfRandom(motifPOSition).Variance = 0 Then
  
   OutFileExperimental.write "not found in background" & vbTab
  
  Else
 
    'variance in promoters hit
   ZSCORE = Abs((STATSofExperimental(motifPOSition).Variance - MEANsOfRandom(motifPOSition).Variance) / TotalRandomStdev(motifPOSition).Variance)
   p_value = Ztbl.Z_Table_Pvalue(ZSCORE)
  
  'OutFileExperimental.write "_" & MEANsOfRandom(motifPOSition).Variance & " " & TotalRandomStdev(motifPOSition).Variance & "_"
  
   If (STATSofExperimental(motifPOSition).Variance - MEANsOfRandom(motifPOSition).Variance) > 0 Then
    OutFileExperimental.write p_value & vbTab
   Else
    OutFileExperimental.write -(p_value) & vbTab
   End If
   
  End If
   
'Next ii


End Sub

Sub SortRandomMatchArrays_getSTDEV()
Dim TmpArray
Dim MMsort As New MM_Sorting
Dim dummy
Dim Variance As Double
Dim ii, i

Variance = 0
For ii = 1 To UBound(Motif_list_Obj)

ReDim TmpArray(NUM_RANDOMIZATIONS)
'* copy it Population PromotersHIT values
For i = 1 To NUM_RANDOMIZATIONS
 TmpArray(i) = Motif_RandPromotersHit_BinaryArray(ii, i)
 Variance = Variance + ((Motif_RandPromotersHit_BinaryArray(ii, i) - MEANsOfRandom(ii).ratio) ^ 2)  ' * (local_hit_per_promoter(ii) - Mean))
Next i

   Variance = Variance / (NUM_RANDOMIZATIONS - 1) ' is N-1 like that
   TotalRandomStdev(ii).ratio = Sqr(Variance)

'* sort it
 dummy = MMsort.SelectionSort_OneforOne(TmpArray)
 
'* copy it back
For i = 1 To NUM_RANDOMIZATIONS
 Motif_RandPromotersHit_BinaryArray(ii, i) = TmpArray(i)
 
 
 
Next i


Variance = 0
ReDim TmpArray(NUM_RANDOMIZATIONS)
'* copy it Population VARIANCE values
For i = 1 To NUM_RANDOMIZATIONS
 TmpArray(i) = Motif_RandVariance_BinaryArray(ii, i)
 Variance = Variance + ((Motif_RandVariance_BinaryArray(ii, i) - MEANsOfRandom(ii).Variance) ^ 2)  ' * (local_hit_per_promoter(ii) - Mean))
Next i
   Variance = Variance / (NUM_RANDOMIZATIONS - 1) ' is N-1 like that
   TotalRandomStdev(ii).Variance = Sqr(Variance)
   
'* sort it
 dummy = MMsort.SelectionSort_OneforOne(TmpArray)
 
'* copy it back
For i = 1 To NUM_RANDOMIZATIONS
 Motif_RandVariance_BinaryArray(ii, i) = TmpArray(i)
Next i



Next ii


End Sub


Function RetrievePromotersWithKeyIndex(ByVal KeyString As String)
Dim i As Long, Lpos As Long, PutPos
Dim KeyArray
Dim PromotersFoundArray() As String

    Lpos = 0
    'PromotersFoundArray = "%"
    KeyString = Mid(KeyString, 1, Len(KeyString) - 1)
    '-  MASTERGENELIST is 1 indexed - split like this
    '-  leaves the first as Blank/Empty since string begins with %
    KeyArray = Split(KeyString, "%", , vbBinaryCompare)
    
    'EXP_SIZE = UBound(KeyArray) + 1
    ReDim ExperimentalArraySEQindex(0)
    ReDim PromotersFoundArray(0)
    
    'MsgBox dataSPACE(0)
    
    
   '- the MASTERGENElist carries all Promoters and the position in dataSpace
    For i = 1 To UBound(KeyArray)
    'MsgBox KeyArray(i)
    PutPos = 0
    PutPos = (MM.ListPosition(KeyArray(i), MASTERgeneLIST, "%", 1))
      If PutPos > 0 Then
       Lpos = Lpos + 1
       ReDim Preserve ExperimentalArraySEQindex(Lpos)
       ReDim Preserve PromotersFoundArray(Lpos)
       ExperimentalArraySEQindex(Lpos) = PutPos  'instead of the sequence like before
       PromotersFoundArray(Lpos) = KeyArray(i)
      End If
    Next i


RetrievePromotersWithKeyIndex = PromotersFoundArray
End Function




Function getAGIlist(ByVal fullpath As String, IdentifierString As String)
Dim fsOpen
Dim Flagg As Integer
    Flagg = 0
Dim i As Integer
Dim AGIlistColumn As Integer
Dim lineSPLITED, atLINE As String

    getAGIlist = "%"
    
    '*open the file and get the AGI
    Set fsOpen = fsO.OpenTextFile(fullpath)
    
    '- save as a % delimited string
   Do Until fsOpen.atEndofStream
    
    atLINE = fsOpen.readLine
    lineSPLITED = Split(atLINE, vbTab, , vbBinaryCompare)
   
    If Flagg = 1 And atLINE <> "" And (UBound(lineSPLITED) >= AGIlistColumn) Then
     getAGIlist = getAGIlist + UCase(lineSPLITED(AGIlistColumn)) + "%"
    End If
    
    If InStr(1, UCase(atLINE), UCase(IdentifierString)) > 0 Then
     For i = 0 To UBound(lineSPLITED)
          If UCase(lineSPLITED(i)) = UCase(IdentifierString) Then
            AGIlistColumn = i
            Flagg = 1
            Exit For
          End If
     Next i
    End If
   
   Loop '- through list file
 
 

End Function

Sub processHITSforRandomSet(RandomSet As Integer)
 Dim local_hit_per_promoter   '* array for the hits
 Dim i, ii
 Dim value_SUM As Integer
 Dim Mean As Double
 Dim Variance As Double
 
 For i = 1 To UBound(Motif_list_Obj)
     
   'MsgBox Motif_list_Obj(2).elem
     
   '* text output is "Ratio \t Variance \n"
   'If SHOWrandomizationIndexes = 1 Then OutFileRandValues.write Motif_list_Obj(i).elem & vbTab & Promoters_w_Hits(i) & vbTab
   Motif_RandPromotersHit_BinaryArray(i, RandomSet) = Promoters_w_Hits(i)
   
   MEANsOfRandom(i).ratio = MEANsOfRandom(i).ratio + (Promoters_w_Hits(i) / NUM_RANDOMIZATIONS)
   
   '* chop off the last %
   Promoters_Hits_as_String_Array(i) = Mid(Promoters_Hits_as_String_Array(i), 1, Len(Promoters_Hits_as_String_Array(i)) - 1)
   local_hit_per_promoter = Split(Promoters_Hits_as_String_Array(i), "%", , vbBinaryCompare)
 
   value_SUM = 0
   For ii = 0 To UBound(local_hit_per_promoter)
      value_SUM = value_SUM + local_hit_per_promoter(ii)
   Next ii
   Mean = value_SUM / RAND_SIZE
   
   'MsgBox Mean

   For ii = 0 To UBound(local_hit_per_promoter)
    Variance = Variance + ((local_hit_per_promoter(ii) - Mean) ^ 2)  ' * (local_hit_per_promoter(ii) - Mean))
   Next ii  '* the SDTDEV is retreived by taking the square root of the variance
   Variance = Variance / (RAND_SIZE - 1)
   
   'If SHOWrandomizationIndexes = 1 Then OutFileRandValues.write Variance & vbNewLine
   Motif_RandVariance_BinaryArray(i, RandomSet) = Variance
   
   MEANsOfRandom(i).Variance = MEANsOfRandom(i).Variance + (Variance / NUM_RANDOMIZATIONS)
   
 Next i

End Sub

Sub processHitsExperimentSet()
 Dim local_hit_per_promoter   '* array for the hits
 Dim i, ii
 Dim value_SUM As Integer
 Dim Mean As Double
 Dim Variance As Double
 
 For i = 1 To UBound(Motif_list_Obj)
 
   STATSofExperimental(i).ratio = Promoters_w_Hits(i)
     
   '* chop off the last %
   Promoters_Hits_as_String_Array(i) = Mid(Promoters_Hits_as_String_Array(i), 1, Len(Promoters_Hits_as_String_Array(i)) - 1)
   local_hit_per_promoter = Split(Promoters_Hits_as_String_Array(i), "%", , vbBinaryCompare)
 
   value_SUM = 0
   For ii = 0 To UBound(local_hit_per_promoter)
      value_SUM = value_SUM + local_hit_per_promoter(ii)
   Next ii
   Mean = value_SUM / (UBound(local_hit_per_promoter) + 1)
   
   'MsgBox Mean
   Variance = 0
   For ii = 0 To UBound(local_hit_per_promoter)
    Variance = Variance + ((local_hit_per_promoter(ii) - Mean) ^ 2)  ' * (local_hit_per_promoter(ii) - Mean))
   Next ii  '* the SDTDEV is retreived by taking the square root of the variance
   If UBound(local_hit_per_promoter) = 0 Then
    'Variance = 0
   Else
   Variance = Variance / UBound(local_hit_per_promoter)  ' is N-1 like that
   End If
  STATSofExperimental(i).Variance = Variance
 Next i


End Sub


Sub ReturnRandomValues()
Dim i, ii, twoctr

If SHOWrandomizationIndexes = 1 Then

 For i = 1 To UBound(Motif_list_Obj)
  OutFileRandValues.write Motif_list_Obj(i).elem & vbTab & Motif_list_Obj(i).elem & vbTab
 Next i
  OutFileRandValues.write vbNewLine
  
  
 For ii = 1 To NUM_RANDOMIZATIONS
 
  For i = 1 To UBound(Motif_list_Obj)
  
  OutFileRandValues.write Motif_RandPromotersHit_BinaryArray(i, ii) & vbTab
  OutFileRandValues.write Motif_RandVariance_BinaryArray(i, ii) & vbTab

  Next i
  
  OutFileRandValues.write vbNewLine
 Next ii


End If

End Sub


Sub getHITStoArraysByIndex(PromoterArraySeqIndex)
Dim ii As Long, i As Long
'* regular expression object from VBscript 5.0 and later
Dim reGGExp
    Set reGGExp = New RegExp
Dim r1Matches, r1Match

For i = 1 To UBound(PromoterArraySeqIndex)
 'MsgBox RandomPromoterArraySEQ(i)
For ii = 1 To UBound(Motif_list_Obj)
     
     
 reGGExp.Pattern = Motif_list_Obj(ii).elemIUPAC
 reGGExp.Global = True
 reGGExp.IgnoreCase = True
 Set r1Matches = reGGExp.Execute(dataSPACE(PromoterArraySeqIndex(i)))
  
 Promoters_Hits_as_String_Array(ii) = Promoters_Hits_as_String_Array(ii) + str(r1Matches.Count) & "%"
  
 If r1Matches.Count > 0 Then Promoters_w_Hits(ii) = Promoters_w_Hits(ii) + 1
      
Next ii
Next i

End Sub


Sub CollectPromoters()

 Dim ExitFLAG As Integer
 Dim iFile, FileCollection      '- fsO is set in Main, is global
 Dim gDatei_map
 Dim gDatei_seq
 Dim dummyVar
 Dim DriveOnly

'* I have to cycle through the GeneBank Files

    If Len(gbFileFolder) = 2 Then
     Set DriveOnly = fsO.getdrive(gbFileFolder)
     Set FileCollection = DriveOnly.RootFolder.Files
    Else
      Set FileCollection = gbFileFolder.Files
    End If
    
    '*DRY run*
    ExitFLAG = 0
    For Each iFile In FileCollection
    If UCase(fsO.GetExtensionName(iFile.Name)) = UCase(gbFileExtension) Then
      If Not fsO.FileExists(seqFileFolder + "\" + iFile.Name + "." + seqFileExtension) Then
       MsgBox (iFile.Name & "seq not found!!"), vbCritical, "ERROR"
       ExitFLAG = 1
      End If
    End If
    Next iFile 'end dry-run
    If ExitFLAG = 1 Then End
    
    For Each iFile In FileCollection
    If UCase(fsO.GetExtensionName(iFile.Name)) = UCase(gbFileExtension) Then
     
     '*
     Set gDatei_map = fsO.OpenTextFile(gbFileFolder + "\" + iFile.Name)
          
     '*
     Set gDatei_seq = fsO.GetFile(seqFileFolder + "\" + iFile.Name + "." + seqFileExtension)
     'Set gDatei_seq = fsO.GetFile(gbFileFolder + SpliceFindFileName(iFile.Name) + ".txt")
     'this would be the default - had i removed the previous file extension, since i did not
     'we used instead lala
     
     '*
     'Set gDatei_auf = fsO.CreateTextFile(FolderName + MM.SpliceOutFileName(iFile.Name) + "_" + gbOutFileName + ".txt")
     
     'run extractions, multiple paramters in Sub()'s are not allowed
     '* HERE - specified which version to use
     dummyVar = REGULATORY_REGIONS_TAIRv7(gDatei_map, gDatei_seq, dataSPACE, RestrictLIST)
    End If
    Next iFile
 

End Sub

Sub Initilize_RestrictList()
 '* this will set up all of the current list paramater calls
Dim qstrNAMES

qstrNAMES = MM.ListFileCheck(2)
'* for seq comparisions
RestrictLIST = Join(qstrNAMES, "%")
RestrictLIST = RestrictLIST + "%"
RestrictLIST = UCase(RestrictLIST)

End Sub

Sub Initilize_MotifList()
Dim Motif_listArray
Dim i

    Motif_listArray = MM.ListFileCheck(1)
    'MsgBox UBound(Motif_listArray)
    
    ReDim Motif_list_Obj(UBound(Motif_listArray))
    For i = 1 To UBound(Motif_listArray)
     Motif_list_Obj(i).elem = Motif_listArray(i)
     Motif_list_Obj(i).elemIUPAC = MM.IUPconvert(Motif_listArray(i))
    Next i

End Sub

Sub Initilize_GBfiles()
'* this will set up the GeneBank folders

gbFileFolder = InputBox("Enter which FOLDER that contains the GenBank files.", "Folder with GenBank Files", "")
If gbFileFolder = vbCancel Or UCase(gbFileFolder) = "Q" Or gbFileFolder = "" Then End
Set gbFileFolder = fsO.GetFolder(gbFileFolder)
If InStrRev(gbFileFolder, "/") Or InStrRev(gbFileFolder, "\") = Len(gbFileFolder) Then gbFileFolder = Mid(gbFileFolder, 1, Len(gbFileFolder) - 1)

seqFileFolder = InputBox("Enter which FOLDER that contains the RAW sequence files.", "Folder with Sequence Files", "")
If seqFileFolder = vbCancel Or UCase(seqFileFolder) = "Q" Or seqFileFolder = "" Then End
Set seqFileFolder = fsO.GetFolder(seqFileFolder)
If InStrRev(seqFileFolder, "/") Or InStrRev(seqFileFolder, "\") = Len(seqFileFolder) Then seqFileFolder = Mid(seqFileFolder, 1, Len(seqFileFolder) - 1)

gbFileExtension = InputBox("Common File Extension, data-type. GenBank file", "Just in case you changes it", "gbk")
If gbFileExtension = "" Then End    'terminate program on error
     
seqFileExtension = InputBox("Common File Extension, data-type. Seq.file only", "Just in case you changes it", "txt")
If seqFileExtension = "" Then End    'terminate program on error


End Sub

Function get_random_LIST(ByVal INPUTList, RAND_list_size As Integer, StringORarray As Integer)
'* this will 'return'(global variable) the next component of random lists
'* an option to save to a text file the array positions should be used
'* to confirm the randomization data

Dim valid_Counts As Integer
Dim outARRAY() As String
Dim r_cnt As Integer
Dim INPUTcopy '-left as variant to take in the array
Dim listMAX As Integer
Dim i

'StringORarray = 0 'defalut for String output
'StringORarray = 1 'defalut for Array output


        'INPUTcopy = Split(INPUTList, "%", , vbBinaryCompare)
        'For i = 0 To UBound(INPUTcopy)
        ' OutFileRandIndex.write INPUTcopy(i) & vbNewLine
        'Next i
        'MsgBox "stop"


'* INPUTlist has to be split first and NULL element removed by removing the lead/trail delimiters
get_random_LIST = "%ZEROBUFFER" + INPUTList 'since the zero index will be ignored
                                      'from the randomization protocol
get_random_LIST = Mid(get_random_LIST, 2, Len(get_random_LIST) - 2)

INPUTcopy = Split(get_random_LIST, "%", , vbBinaryCompare)

listMAX = UBound(INPUTcopy)
'INPUTcopy = INPUTList
valid_Counts = 0
r_cnt = 0

Do
Randomize 'Randomize statement without an argument to initialize the random-number generator with a seed based on the system timer

' Generate random value between 1 and listMAX.
i = Round(CLng(Int((listMAX * Rnd(1)) + 1)), 1)

 If INPUTcopy(i) <> "#removed#" Then
   r_cnt = r_cnt + 1
   ReDim Preserve outARRAY(r_cnt)
   
   If StringORarray = 0 Then
    outARRAY(r_cnt) = INPUTcopy(i)
   ElseIf StringORarray = 1 Then
    outARRAY(r_cnt) = i
   End If
   
   valid_Counts = valid_Counts + 1
   INPUTcopy(i) = "#removed#"
   
   If SHOWrandomizationIndexes = 1 Then OutFileRandIndex.write i & " "
 End If

Loop While valid_Counts < RAND_list_size

'* exit calls
If SHOWrandomizationIndexes = 1 Then OutFileRandIndex.write vbNewLine

If StringORarray = 0 Then
 get_random_LIST = Join(outARRAY, "%")
 get_random_LIST = get_random_LIST + "%"   '* return properly delimited list
ElseIf StringORarray = 1 Then
 get_random_LIST = outARRAY
End If
End Function

Function REGULATORY_REGIONS_TAIRv6(ByRef xMapFileObj, ByRef xSeqFilePath, ByRef xOutSeqArr, ByRef vListArray)

Dim FileNum
    FileNum = FreeFile
    Open xSeqFilePath For Binary As FileNum
    'receiving varaible must be STRING!
Dim Lread, lastLread
Dim Lb5pPRE
Dim Lb3pPRE
Dim St5pPRE
Dim St3pPRE
Dim Lb5pCUR
Dim Lb3pCUR
Dim St5pCUR
Dim St3pCUR
'Dim CURR_TSS As Integer
'Dim PREV_TSS As Integer
    '0 = ATG only avaliable, 1 = TSS rooted, first entry (MM method)
Dim Upstm
Dim Dwstm
Dim LbSeqAr As String
    LbSeqAr = ""
Dim LmRNAflag
    LmRNAflag = 0
Dim LCDSflag
    LCDSflag = 0
Dim GenFlag
    GenFlag = 0
Dim GBEntry As GenBankAnot
    GBEntry.Gene = ""
    GBEntry.CDS = ""
    GBEntry.mRNA = ""
    GBEntry.Name = ""
    GBEntry.Orient = Empty
    GBEntry.Occuppied = 0
Dim GBEntryPrev As GenBankAnot
    GBEntryPrev.Gene = ""
    GBEntryPrev.CDS = ""
    GBEntryPrev.mRNA = ""
    GBEntryPrev.Name = ""
    GBEntryPrev.Orient = Empty
    GBEntryPrev.Occuppied = 0
Dim POsi()
Dim ExonArray() As Long
Dim RNA_Array() As Long
Dim i, x
Dim p5UTRstring As String
Dim p3UTRstring As String
Dim exonAScds As String
Dim Previous As String
    Previous = ""
'* regular expression object from VBscript 5.0 and later
Dim reGGExp
    Set reGGExp = New RegExp
Dim r1Matches
Dim r1Match


'* CHECK - before one could callup the form and check at desire!
  If RestrictLIST <> "" Then frmGB_SQL.chSeqList.value = True
   '- otherwise we extract entire data space
  frmGB_SQL.chkTStart.value = True
  frmGB_SQL.optUpstream.value = True
  frmGB_SQL.txtBegin.value = "-1500"
  frmGB_SQL.txtEnd.value = "0"
'* CHECK for now = a simpler solution



Do
Lread = xMapFileObj.readLine

'* this is the definitive annotation point, CDS = gene is present
'* in the the v.4 and v.5 retreived annotations, more data is present
'* and multiple enteries for alternavtive products!

If GenFlag = 1 And (InStr(1, Lread, "     gene  ") Or InStr(1, Lread, "ORIGIN")) Then 'prevents fisrt empty match
  
    GBEntry.Occuppied = 1
          
    If InStr(1, GBEntry.Gene, "     mRNA  ") And (InStr(1, GBEntry.Gene, "     CDS  ")) Then
     GBEntry.mRNA = Mid(GBEntry.Gene, InStr(1, GBEntry.Gene, "     mRNA  "), InStr(1, GBEntry.Gene, "     CDS  ") - InStr(1, GBEntry.Gene, "     mRNA  ") + 1)
    ElseIf InStr(1, GBEntry.Gene, "     mRNA  ") Then
     GBEntry.mRNA = Mid(GBEntry.Gene, InStr(1, GBEntry.Gene, "     mRNA  "), Len(GBEntry.Gene) - InStr(1, GBEntry.Gene, "     mRNA  ") + 1)
    End If
    
    If InStr(1, GBEntry.Gene, "     CDS  ") Then GBEntry.CDS = Mid(GBEntry.Gene, InStr(1, GBEntry.Gene, "     CDS  "), Len(GBEntry.Gene) - InStr(1, GBEntry.Gene, "     CDS  ") + 1)
    
    If frmGB_SQL.chkToggleGene.value Then GBEntry.Gene = Mid(GBEntry.Gene, 1, InStr(1, GBEntry.Gene, "     mRNA  ") + 1)
    
    'NEW!! 08.08.05
    'extra check for unfinished- well, undefined ATG beginnings!!!
    'If InStr(1, GBEntry.mRNA, "<") Or (InStr(1, GBEntry.mRNA, ">") And (GBEntry.Orient)) Then GoTo ProcessFinished
    

  '* tag extraction from v3.6.0
    If InStr(1, GBEntry.Gene, "locus_tag: ") Then
    'GBEntry.LocusTag = Mid(GBEntry.Gene, InStr(1, GBEntry.Gene, "locus_tag: ") + 11, 9)
      
        reGGExp.Pattern = "locus_tag: ([\S\w]+);" '*only for TAIRv6 Arabidopsis
        reGGExp.Global = True
        reGGExp.IgnoreCase = True
        Set r1Matches = reGGExp.Execute(GBEntry.Gene)
  
        GBEntry.Name = r1Matches.Item(0).SubMatches(0)
        GBEntry.LocusTag = GBEntry.Name
  
    End If
      
      
    If InStr(1, GBEntry.Gene, "locus_tag=") Then
    'GBEntry.LocusTag = Mid(GBEntry.Gene, InStr(1, GBEntry.Gene, "locus_tag=") + 11, 9)
    
        reGGExp.Pattern = "locus_tag=\""([ \S\w]+)\"""
        reGGExp.Global = True
        reGGExp.IgnoreCase = True
        Set r1Matches = reGGExp.Execute(GBEntry.Gene)
        
        'If InStr(1, GBEntry.Gene, "AT1G01020") > 0 Then MsgBox GBEntry.Gene
        
        GBEntry.Name = r1Matches.Item(0).SubMatches(0)
        
        GBEntry.LocusTag = GBEntry.Name
        'MsgBox GBEntry.Name
    
    End If
    
  
 
       
    
    '*debugging contol message
 'MsgBox GBEntry.LocusTag & "-locustag<>name-" & GBEntry.Name
    
    
End If



    




If GBEntry.Occuppied Then

    '* check for "redundant" enteries !! (alternative transcripts)
    ' this might be cheap skip of the beginning

    If GBEntry.LocusTag = GBEntryPrev.LocusTag And GBEntry.Name = GBEntryPrev.Name Then
     GoTo ProcessFinished
    End If

 '* in the curr gene
 '*get first info *(current entry)*
    '* it makes more sense to convert everything to the gene borders. new v.3.5.0
    
   If InStr(1, GBEntry.Gene, "     CDS  ") = 0 Then 'scan CDS encoding genes only
    
    'If frmGB_SQL.chkTO_NEXT_GENE.value Or frmGB_SQL.chkNO_OVERLAP.value Then
    
     'toggle GENE annot - watch out for CDS for readout
     'meaning: skip rRNA and tranposon, or other RNA genes
        RNA_Array = gbSQL_BC.SGBA(gbSQL_BC.prePARSER(GBEntry.Gene, "gene  "))
        ExonArray = gbSQL_BC.SGBA(gbSQL_BC.prePARSER(GBEntry.Gene, "gene  "))
        Lb5pCUR = RNA_Array(1)
        Lb3pCUR = RNA_Array(UBound(RNA_Array))
        St5pCUR = ExonArray(1)
        St3pCUR = ExonArray(UBound(ExonArray))
     'so you realize that if I dont do that, i should get all genes
     'which would be even more interesting - could really start dividing
     'up promoter structure - might be really frutiful!!!!!!!!!!
    'Else
    '  GoTo Not_CDS_Gene 'silence this - and all genes will give something out
    'End If
   
   Else 'the CDS is present - handel as before
   
    If frmGB_SQL.chkToggleGene.value Then
    
        RNA_Array = gbSQL_BC.SGBA(gbSQL_BC.prePARSER(GBEntry.Gene, "gene  "))
        ExonArray = gbSQL_BC.SGBA(gbSQL_BC.prePARSER(GBEntry.Gene, "gene  "))
    
    Else
            
     If GBEntry.mRNA <> "" Then
      RNA_Array = gbSQL_BC.SGBA(gbSQL_BC.prePARSER(GBEntry.mRNA, "mRNA  "))
      Lb5pCUR = RNA_Array(1)
      Lb3pCUR = RNA_Array(UBound(RNA_Array))
     End If 'if mRNA is present

     '* is REQUIRED by this script
     ExonArray = gbSQL_BC.SGBA(gbSQL_BC.prePARSER(GBEntry.CDS, "CDS  "))
     St5pCUR = ExonArray(1)
     St3pCUR = ExonArray(UBound(ExonArray))

     If GBEntry.mRNA = "" Then  'just in case!
     'CURR_TSS = 0
     RNA_Array = ExonArray
     Lb5pCUR = St5pCUR
     Lb3pCUR = St3pCUR
     End If
    
    End If  '* end of toggle-gene
    
End If 'Gene entry CDS is present or not, skipped if the genewatch is on or not
       
'MsgBox "next break"
    
    
 '* if the prev gene has already been logged - we can ?
 If (GBEntryPrev.Gene <> "") Then
   If (InStr(1, GBEntryPrev.Gene, "     CDS  ") = 0) Then    'scan CDS encoding genes only
    
    'If frmGB_SQL.chkTO_NEXT_GENE.value Or frmGB_SQL.chkNO_OVERLAP.value Then
    
     'toggle GENE annot - watch out for CDS for readout
     'meaning: skip rRNA and tranposon, or other RNA genes
        RNA_Array = gbSQL_BC.SGBA(gbSQL_BC.prePARSER(GBEntryPrev.Gene, "gene  "))
        ExonArray = gbSQL_BC.SGBA(gbSQL_BC.prePARSER(GBEntryPrev.Gene, "gene  "))
        Lb5pPRE = RNA_Array(1)
        Lb3pPRE = RNA_Array(UBound(RNA_Array))
        St5pPRE = ExonArray(1)
        St3pPRE = ExonArray(UBound(ExonArray))
     'so you realize that if I dont do that, i should get all genes
     'which would be even more interesting - could really start dividing
     'up promoter structure - might be really frutiful!!!!!!!!!!
   Else
    
    If GBEntryPrev.mRNA <> "" Then
     If frmGB_SQL.chkToggleGene.value Then
        RNA_Array = gbSQL_BC.SGBA(gbSQL_BC.prePARSER(GBEntryPrev.Gene, "gene  "))
        Else
        RNA_Array = gbSQL_BC.SGBA(gbSQL_BC.prePARSER(GBEntryPrev.mRNA, "mRNA  "))
     End If
     
     Lb5pPRE = RNA_Array(1)
     Lb3pPRE = RNA_Array(UBound(RNA_Array))
    End If 'if mRNA is present

   '*
   If GBEntryPrev.CDS <> "" Then
    ExonArray = gbSQL_BC.SGBA(gbSQL_BC.prePARSER(GBEntryPrev.CDS, "CDS  "))
    St5pPRE = ExonArray(1)
    St3pPRE = ExonArray(UBound(ExonArray))

     If GBEntryPrev.mRNA = "" Then  'just in case!
     'PREV_TSS = 0
     RNA_Array = ExonArray
     Lb5pPRE = St5pPRE
     Lb3pPRE = St3pPRE
     End If
   End If
  
 'End If 'NEXT or TO_GENE check
 End If 'CDS = 0 ?
End If 'prev entry has been found !
 '*get first info (current entry)


'MsgBox UBound(xOutSeqArr)

'#record the postions of the gene, and orientations
'* check for beginning, if the gene is oriented 5'->3' then we extract the promoter
'* DONT FORGET THE vListArray comparisons

'* first gene found, previous gene empty, orientation is sense
If GBEntry.Gene <> "" And GBEntryPrev.Gene = "" And GBEntry.Orient = 0 Then
'beginEntry = 0 is sense...
   '* for this version I made 2 new check boxes
   ' chkNO_OVERLAP    and    chkTO_NEXT_GENE  (only toNEXTgene is important)
   
   ' turn this off to get all genes - or add checks for gene types
   ' other options is to print out the gene type etc.
   'MsgBox "test gene match"
   If GBEntry.CDS = "" Then GoTo ProcessFinished
   
   '* CODE CHECK FOR NAME LIST ACCEPTANCE
    If frmGB_SQL.chSeqList.value = True Then
    
     If (MM.InList(GBEntry.LocusTag, vListArray, "%") = True And (GBEntry.LocusTag <> "")) Or (MM.InList(GBEntry.Name, vListArray, "%") = True And (GBEntry.Name <> "")) Then
      'GoTo isGOOD_1
     Else
      GoTo ProcessFinished
     End If
     
'isGOOD_1:
    End If
   '* END CODE CHECK FOR NAME LIST ACCEPTANCE
   
  If frmGB_SQL.chkTStart.value Then
    
        Dwstm = Lb5pCUR + CLng(Val(frmGB_SQL.txtEnd.value))
        
        If frmGB_SQL.chkTO_NEXT_GENE.value Then
         Upstm = 1
        Else
         Upstm = Lb5pCUR + (CLng(Val(frmGB_SQL.txtBegin.value)))
        End If
        
    ElseIf frmGB_SQL.chkTstop.value Then
        
        Upstm = Lb3pCUR + CLng(Val(frmGB_SQL.txtBegin.value))
        Dwstm = Lb3pCUR + CLng(Val(frmGB_SQL.txtEnd.value))
    
    ElseIf frmGB_SQL.chkATG.value Then
   
        Dwstm = St5pCUR + CLng(Val(frmGB_SQL.txtEnd.value))
        
        If frmGB_SQL.chkTO_NEXT_GENE.value Then
         Upstm = 1
        Else
         Upstm = St5pCUR + CLng(Val(frmGB_SQL.txtBegin.value))
        End If
        
    ElseIf frmGB_SQL.chkSTOP.value Then
   
        Upstm = St3pCUR + CLng(Val(frmGB_SQL.txtBegin.value))
        Dwstm = St3pCUR + CLng(Val(frmGB_SQL.txtEnd.value))
    
    End If
   
  '****************************************************************
  If frmGB_SQL.optUpstream.value Then
        
   If Upstm < 1 Then Upstm = 1
   LbSeqAr = String((Dwstm - Upstm), " ")
   Get #FileNum, Upstm, LbSeqAr

   'xOutSeqArr.write ">" + GBEntry.LocusTag + " " + GBEntry.Name + " rglUp"
   
  End If
 
 '****************************************************************
 If frmGB_SQL.optDownstream.value Then

    If Dwstm > LOF(FileNum) Then Dwstm = LOF(FileNum)
    LbSeqAr = String(Dwstm - Upstm, " ")
    Get #FileNum, Upstm + 1, LbSeqAr

    'xOutSeqArr.write ">" + GBEntry.LocusTag + " " + GBEntry.Name + " rglDwn"
 End If
  
 If LbSeqAr <> "" Then
   'xOutSeqArr.write " size:=" & Str(Dwstm - Upstm) + vbNewLine
   
   '*orientation is sense, write out current gene
   MASTERgeneLIST = MASTERgeneLIST + UCase(GBEntry.LocusTag) + "%"
   ReDim Preserve xOutSeqArr(UBound(xOutSeqArr) + 1)
   xOutSeqArr(UBound(xOutSeqArr)) = LbSeqAr
   
 End If

   
 

ElseIf GBEntry.Gene <> "" And GBEntryPrev.Gene <> "" Then
 'we have an intergenic region between genes!, both entries are filled
 '---------------------------------------------------------------
 'If InStr(1, GBEntryPrev.LocusTag, "At3g") Then MsgBox "name match debug stop"
  '* - - - - - - - - - - - - -
  '* previous entry is ANTISENSE
   If GBEntryPrev.Orient = 1 Then
    
   ' turn this off to get all genes - or add checks for gene types
   ' other options is to print out the gene type etc.
   If GBEntryPrev.CDS = "" Then GoTo ProcessNextinLine
   
    '* CODE CHECK FOR NAME LIST ACCEPTANCE
    If frmGB_SQL.chSeqList.value = True Then
    
     If (MM.InList(GBEntryPrev.LocusTag, vListArray, "%") = True And (GBEntryPrev.LocusTag <> "")) Or (MM.InList(GBEntryPrev.Name, vListArray, "%") = True And (GBEntryPrev.Name <> "")) Then
      'GoTo isGOOD_2
     Else
      GoTo ProcessNextinLine
     End If
     
'isGOOD_2:
    End If
   '* END CODE CHECK FOR NAME LIST ACCEPTANCE
    
    'MsgBox (Lb5pCUR > Lb3pPRE) And (St5pCUR > St3pPRE)
    
    '* OUTPUT -this forces non-overlapping genes - which is not absolute
    If (Lb5pCUR > Lb3pPRE) And (St5pCUR > St3pPRE) Then   'use the mNRA (transcription positions)
   
    If frmGB_SQL.chkTStart.value Then
    
        If frmGB_SQL.chkTO_NEXT_GENE.value Then
         Dwstm = Lb5pCUR
         Upstm = Lb3pPRE
        Else
         
         
         Dwstm = Lb3pPRE + -(CLng(Val(frmGB_SQL.txtBegin.value)))
         If frmGB_SQL.chkNO_OVERLAP.value Then '* UNTIL next TSS/begin or END
          If Dwstm > Lb5pCUR Then Dwstm = Lb5pCUR
         End If

                 
        Upstm = Lb3pPRE + -(CLng(Val(frmGB_SQL.txtEnd.value)))
        End If
    
    ElseIf frmGB_SQL.chkTstop.value Then
               
        If frmGB_SQL.chkTO_NEXT_GENE.value Then
         Dwstm = Lb5pCUR
         Upstm = Lb5pPRE
        Else
         
         
         Dwstm = Lb5pPRE + -(CLng(Val(frmGB_SQL.txtBegin.value)))
         If frmGB_SQL.chkNO_OVERLAP.value Then '* UNTIL next TSS/begin or END
          If Dwstm > Lb5pCUR Then Dwstm = Lb5pCUR
         End If
                         
        Upstm = Lb5pPRE + -(CLng(Val(frmGB_SQL.txtEnd.value)))
        End If
    
    ElseIf frmGB_SQL.chkATG.value Then
          
        If frmGB_SQL.chkTO_NEXT_GENE.value Then
         Dwstm = St5pCUR
         Upstm = St3pPRE
        Else
         
         Dwstm = St3pPRE + -(CLng(Val(frmGB_SQL.txtBegin.value)))
         If frmGB_SQL.chkNO_OVERLAP.value Then '* UNTIL next ATG/STOP
          If Dwstm > St5pCUR Then Dwstm = St5pCUR
         End If
                         
        Upstm = St3pPRE + -(CLng(Val(frmGB_SQL.txtEnd.value)))
        End If
    
    ElseIf frmGB_SQL.chkSTOP.value Then
   
        If frmGB_SQL.chkTO_NEXT_GENE.value Then
         Dwstm = St5pCUR
         Upstm = St5pPRE
        Else
                  
         Dwstm = St5pPRE + -(CLng(Val(frmGB_SQL.txtBegin.value)))
         If frmGB_SQL.chkNO_OVERLAP.value Then '* UNTIL next ATG/STOP
          If Dwstm > St5pCUR Then Dwstm = St5pCUR
         End If
                          
        Upstm = St5pPRE + -(CLng(Val(frmGB_SQL.txtEnd.value)))
        End If
    
    End If
   
  '****************************************************************
  If frmGB_SQL.optUpstream.value Then

   If Dwstm > LOF(FileNum) Then Dwstm = LOF(FileNum)
   LbSeqAr = String((Dwstm - Upstm), " ")
   Get #FileNum, Upstm + 1, LbSeqAr

   'xOutSeqArr.write ">" + GBEntryPrev.LocusTag + " " + GBEntryPrev.Name + " rglUp"

  End If
 
 '****************************************************************
 If frmGB_SQL.optDownstream.value Then

   If Upstm < 1 Then Upstm = 1
   LbSeqAr = String(Dwstm - Upstm, " ")
   Get #FileNum, Upstm, LbSeqAr

   'xOutSeqArr.write ">" + GBEntryPrev.LocusTag + " " + GBEntryPrev.Name + " rglDwn"
 End If
  
 If LbSeqAr <> "" Then
   'xOutSeqArr.write " size:=" & Str(Dwstm - Upstm) + vbNewLine
   'xOutSeqArr.write MM.RevComp(LbSeqAr) + vbNewLine
   '*PREVIOUS is antisense, we get its promoter
   
      MASTERgeneLIST = MASTERgeneLIST + UCase(GBEntryPrev.LocusTag) + "%"
      ReDim Preserve xOutSeqArr(UBound(xOutSeqArr) + 1)
      xOutSeqArr(UBound(xOutSeqArr)) = MM.RevComp(LbSeqAr)
   
 End If
   '****************************************************************

    End If '-the previous is upstream of the current gene
   End If 'previous anstisense oriented


ProcessNextinLine:
   '* - - - - - - - - - - - -
   '* current is sense oriented, we process it
   If GBEntry.Orient = 0 Then

   ' turn this off to get all genes - or add checks for gene types
   ' other options is to print out the gene type etc.
   If GBEntry.CDS = "" Then GoTo ProcessFinished

       '* check if we are looking for particular sequences
    If frmGB_SQL.chSeqList.value = True Then
    
     If (MM.InList(GBEntry.LocusTag, vListArray, "%") = True And (GBEntry.LocusTag <> "")) Or (MM.InList(GBEntry.Name, vListArray, "%") = True And (GBEntry.Name <> "")) Then
      'GoTo isGOOD_3
     Else
      GoTo ProcessFinished
     End If
     
'isGOOD:
    End If

    '* recall that this forces non-overlapping genes for output
    If (Lb5pCUR > Lb3pPRE) And (St5pCUR > St3pPRE) Then     'use the mNRA (transcription positions)
   
       If frmGB_SQL.chkTStart.value Then
           
        If frmGB_SQL.chkTO_NEXT_GENE.value Then
         Dwstm = Lb5pCUR
         Upstm = Lb3pPRE
        Else
        
         Upstm = Lb5pCUR + (CLng(frmGB_SQL.txtBegin.value))
         If frmGB_SQL.chkNO_OVERLAP.value Then '* UNTIL next TSS/begin or END
          If Upstm < Lb3pPRE Then Upstm = Lb3pPRE
         End If
                         
        Dwstm = Lb5pCUR + CLng(Val(frmGB_SQL.txtEnd.value))
        End If
    
    ElseIf frmGB_SQL.chkTstop.value Then
        
       
        If frmGB_SQL.chkTO_NEXT_GENE.value Then
         Dwstm = Lb3pCUR
         Upstm = Lb3pPRE
        Else
         
         Upstm = Lb3pCUR + CLng(frmGB_SQL.txtBegin.value)
         If frmGB_SQL.chkNO_OVERLAP.value Then '* UNTIL next TSS/begin or END
          If Upstm < Lb3pPRE Then Upstm = Lb3pPRE
         End If
                         
        Dwstm = Lb3pCUR + CLng(Val(frmGB_SQL.txtEnd.value))
        End If
    
    ElseIf frmGB_SQL.chkATG.value Then
          
        If frmGB_SQL.chkTO_NEXT_GENE.value Then
         Dwstm = St5pCUR
         Upstm = St3pPRE
        Else
         
         Upstm = St5pCUR + CLng(frmGB_SQL.txtBegin.value)
         If frmGB_SQL.chkNO_OVERLAP.value Then '* UNTIL next TSS/begin or END
          If Upstm < St3pPRE Then Upstm = St3pPRE
         End If
                         
        Dwstm = St5pCUR + CLng(Val(frmGB_SQL.txtEnd.value))
        End If
        
    
    ElseIf frmGB_SQL.chkSTOP.value Then
           
        If frmGB_SQL.chkTO_NEXT_GENE.value Then
         Dwstm = St3pCUR
         Upstm = St3pPRE
        Else
        
         Upstm = St3pCUR + CLng(frmGB_SQL.txtBegin.value)
         If frmGB_SQL.chkNO_OVERLAP.value Then '* UNTIL next TSS/begin or END
          If Upstm < St3pPRE Then Upstm = St3pPRE
         End If
                         
        Dwstm = St3pCUR + CLng(Val(frmGB_SQL.txtEnd.value))
        End If
    
    End If

   
  '****************************************************************
  If frmGB_SQL.optUpstream.value Then

   If Upstm < 1 Then Upstm = 1
   LbSeqAr = String((Dwstm - Upstm), " ")
   Get #FileNum, Upstm, LbSeqAr

   'xOutSeqArr.write ">" + GBEntry.LocusTag + " " + GBEntry.Name + " rglUp"

  End If
 
 '****************************************************************
 If frmGB_SQL.optDownstream.value Then

   If Dwstm > LOF(FileNum) Then Dwstm = LOF(FileNum)
   LbSeqAr = String(Dwstm - Upstm, " ")
   Get #FileNum, Upstm + 1, LbSeqAr

   'xOutSeqArr.write ">" + GBEntry.LocusTag + " " + GBEntry.Name + " rglDwn"
   
 End If
  
 If LbSeqAr <> "" Then
   'xOutSeqArr.write " size:=" & Str(Dwstm - Upstm) + vbNewLine
   'xOutSeqArr.write LbSeqAr + vbNewLine
   
      '* current is sense, save it
      MASTERgeneLIST = MASTERgeneLIST + UCase(GBEntry.LocusTag) + "%"
      ReDim Preserve xOutSeqArr(UBound(xOutSeqArr) + 1)
      xOutSeqArr(UBound(xOutSeqArr)) = LbSeqAr
   
 End If
   
   
   
   
    GoTo ProcessFinished
    End If '-frmGB_SQL.chkmRNA.Value
   End If 'current sense oriented

End If
'get last on exit


'-- reset values
'* EXIT ***********************
ProcessFinished:

If InStr(1, Lread, "ORIGIN") < 1 Then

 Previous = GBEntry.LocusTag
 GBEntryPrev = GBEntry
 
 GenFlag = 0
 LbSeqAr = ""
 GBEntry.Gene = ""
 GBEntry.CDS = ""
 GBEntry.mRNA = ""
 GBEntry.Name = ""
 GBEntry.LocusTag = ""
 GBEntry.Orient = Empty
 GBEntry.Occuppied = 0
 
End If 'don't throw away the last objects!
 
End If '- occupp ied -----------------------------------------------------------------


If GenFlag = 1 Then
 GBEntry.Gene = GBEntry.Gene + Lread + vbCr
  'MsgBox GBEntryPrev.Gene
End If

If InStr(1, Lread, "     gene  ") Then 'using the previous default below for CDS exit ("/")
 GBEntry.Gene = Lread + vbCr           'this entry would be one ahead of the CDS anno
 GenFlag = 1
 If InStr(1, Lread, "complement") Then
 GBEntry.Orient = 1
 Else
 GBEntry.Orient = 0
 End If
End If 'catch gene annotation flag

'---#MAIN LOOP of GenBank file
Loop Until xMapFileObj.atEndofStream Or InStr(1, Lread, "ORIGIN")


'* LAST GENE CALL -
'* only called when the last one (gene) is antisense
If GBEntryPrev.Gene <> "" And GBEntry.Orient = 1 Then

   ' turn this off to get all genes - or add checks for gene types
   ' other options is to print out the gene type etc.
   'If GBEntry.CDS = "" Then Exit Function

  '* END CODE CHECK FOR NAME LIST ACCEPTANCE - sense the pairs are handeled in the
  '* check if we are looking for particular sequences
    If frmGB_SQL.chSeqList.value = True Then
    
     If (MM.InList(GBEntry.LocusTag, vListArray, "%") = True And (GBEntry.LocusTag <> "")) Or (MM.InList(GBEntry.Name, vListArray, "%") = True And (GBEntry.Name <> "")) Then
      'GoTo isGOOD_3
     Else
      GoTo FinalGENE_noMATCH
     End If
     
'isGOOD:
    End If
  '- the next IF-THEN statement
  
  
    If frmGB_SQL.chkTStart.value Then
    
        If frmGB_SQL.chkTO_NEXT_GENE.value Then
         Dwstm = LOF(FileNum)
        Else
         Dwstm = Lb3pCUR + -(CLng(Val(frmGB_SQL.txtBegin.value)))
        End If
        
        Upstm = Lb3pCUR + -(CLng(Val(frmGB_SQL.txtEnd.value)))
   
    ElseIf frmGB_SQL.chkTstop.value Then
        
        Dwstm = Lb5pCUR + -(CLng(Val(frmGB_SQL.txtBegin.value)))
        Upstm = Lb5pCUR + -(CLng(Val(frmGB_SQL.txtEnd.value)))
   
    ElseIf frmGB_SQL.chkATG.value Then
   
       
        If frmGB_SQL.chkTO_NEXT_GENE.value Then
         Dwstm = LOF(FileNum)
        Else
         Dwstm = St3pCUR + -(CLng(Val(frmGB_SQL.txtBegin.value)))
        End If
        
        Upstm = St3pCUR + -(CLng(Val(frmGB_SQL.txtEnd.value)))
    
    ElseIf frmGB_SQL.chkSTOP.value Then
   
        Dwstm = St5pCUR + -(CLng(Val(frmGB_SQL.txtBegin.value)))
        Upstm = St5pCUR + -(CLng(Val(frmGB_SQL.txtEnd.value)))

    End If
   
   
   
  '****************************************************************
  If frmGB_SQL.optUpstream.value Then

   If Dwstm > LOF(FileNum) Then Dwstm = LOF(FileNum)
   LbSeqAr = String((Dwstm - Upstm), " ")
   Get #FileNum, Upstm + 1, LbSeqAr

   'xOutSeqArr.write ">" + GBEntry.LocusTag + " " + GBEntry.Name + " rglUp"

  End If
 
 '****************************************************************
 If frmGB_SQL.optDownstream.value Then

  If Upstm < 1 Then Upstm = 1
  LbSeqAr = String(Dwstm - Upstm, " ")
  Get #FileNum, Upstm, LbSeqAr

  'xOutSeqArr.write ">" + GBEntry.LocusTag + " " + GBEntry.Name + " rglDwn"
 End If
  
 '****************************************************************
 If LbSeqAr <> "" Then
   'xOutSeqArr.write " size:=" & Str(Dwstm - Upstm) + vbNewLine
   'xOutSeqArr.write MM.RevComp(LbSeqAr) + vbNewLine
   
   '* last gene is antisense
      MASTERgeneLIST = MASTERgeneLIST + UCase(GBEntry.LocusTag) + "%"
      ReDim Preserve xOutSeqArr(UBound(xOutSeqArr) + 1)
      xOutSeqArr(UBound(xOutSeqArr)) = MM.RevComp(LbSeqAr)
      
 End If

'*OUTPUT (end, final)
End If

FinalGENE_noMATCH:
Close FileNum
REGULATORY_REGIONS_TAIRv6 = ""

'MsgBox UBound(xOutSeqArr)

End Function

Private Function REGULATORY_REGIONS_TAIRv7(xMapFileObj, xSeqFilePath, xOutSeqArr, vListArray)

Dim FileNum
    FileNum = FreeFile
    Open xSeqFilePath For Binary As FileNum
    'receiving varaible must be STRING!
Dim Lread, lastLread
Dim Lb5pPRE
Dim Lb3pPRE
Dim St5pPRE
Dim St3pPRE
Dim Lb5pCUR
Dim Lb3pCUR
Dim St5pCUR
Dim St3pCUR
'Dim CURR_TSS As Integer
'Dim PREV_TSS As Integer
    '0 = ATG only avaliable, 1 = TSS rooted, first entry (MM method)
Dim Upstm
Dim Dwstm
Dim LbSeqAr As String
    LbSeqAr = ""
Dim LmRNAflag
    LmRNAflag = 0
Dim LCDSflag
    LCDSflag = 0
Dim GenFlag
    GenFlag = 0
Dim GBEntry As MM_Types_vb.GenBankAnot
    GBEntry.Gene = ""
    GBEntry.CDS = ""
    GBEntry.mRNA = ""
    GBEntry.Name = ""
    GBEntry.Orient = Empty
    GBEntry.Occuppied = 0
Dim GBEntryPrev As MM_Types_vb.GenBankAnot
    GBEntryPrev.Gene = ""
    GBEntryPrev.CDS = ""
    GBEntryPrev.mRNA = ""
    GBEntryPrev.Name = ""
    GBEntryPrev.Orient = Empty
    GBEntryPrev.Occuppied = 0
Dim POsi()
Dim ExonArray() As Long
Dim RNA_Array() As Long
Dim i, x
Dim p5UTRstring As String
Dim p3UTRstring As String
Dim exonAScds As String
Dim Previous As String
    Previous = ""
'Dim beginEntry
'    beginEntry = 1

'* regular expression object from VBscript 5.0 and later
Dim reGGExp
    Set reGGExp = New RegExp
Dim r1Matches
Dim r1Match


'* CHECK - before one could callup the form and check at desire!
  If RestrictLIST <> "" Then frmGB_SQL.chSeqList.value = True
   '- otherwise we extract entire data space
  frmGB_SQL.chkTStart.value = True
  frmGB_SQL.optUpstream.value = True
  frmGB_SQL.txtBegin.value = "-1500"
  frmGB_SQL.txtEnd.value = "0"
'* CHECK for now = a simpler solution


Do
Lread = xMapFileObj.readLine

'* this is the definitive annotation point, CDS = gene is present
'* in the the v.4 and v.5 retreived annotations, more data is present
'* and multiple enteries for alternavtive products!

If GenFlag = 1 And (InStr(1, Lread, "     gene  ") Or InStr(1, Lread, "ORIGIN")) Then 'prevents fisrt empty match
  
    GBEntry.Occuppied = 1
          
    If InStr(1, GBEntry.Gene, "     mRNA  ") And (InStr(1, GBEntry.Gene, "     CDS  ")) Then
     If InStr(1, GBEntry.Gene, "     mRNA  ") < (InStr(1, GBEntry.Gene, "     CDS  ")) Then
     GBEntry.mRNA = Mid(GBEntry.Gene, InStr(1, GBEntry.Gene, "     mRNA  "), InStr(1, GBEntry.Gene, "     CDS  ") - InStr(1, GBEntry.Gene, "     mRNA  ") + 1)
     Else
      GoTo ProcessFinished
     End If
    ElseIf InStr(1, GBEntry.Gene, "     mRNA  ") Then
     GBEntry.mRNA = Mid(GBEntry.Gene, InStr(1, GBEntry.Gene, "     mRNA  "), Len(GBEntry.Gene) - InStr(1, GBEntry.Gene, "     mRNA  ") + 1)
    End If
    
    If InStr(1, GBEntry.Gene, "     CDS  ") Then GBEntry.CDS = Mid(GBEntry.Gene, InStr(1, GBEntry.Gene, "     CDS  "), Len(GBEntry.Gene) - InStr(1, GBEntry.Gene, "     CDS  ") + 1)
    
    If frmGB_SQL.chkToggleGene.value Then GBEntry.Gene = Mid(GBEntry.Gene, 1, InStr(1, GBEntry.Gene, "     mRNA  ") + 1)
    
    'NEW!! 08.08.05
    'extra check for unfinished- well, undefined ATG beginnings!!!
    'If InStr(1, GBEntry.mRNA, "<") Or (InStr(1, GBEntry.mRNA, ">") And (GBEntry.Orient)) Then GoTo ProcessFinished
    
  '* give the gene name version 3.3.4 for Atv.4
  '  If InStr(1, GBEntry.Gene, "gene=") Then
  '  'GBEntry.Name = Mid(GBEntry.Gene, InStr(1, GBEntry.Gene, "gene=") + 6, InStr(InStr(1, GBEntry.Gene, "gene=") + 6, GBEntry.Gene, vbCr) - (InStr(1, GBEntry.Gene, "gene=") + 6) - 1)
  '
  '      reGGExp.Pattern = "gene=\""([ \S\w]+)\"""
  '      reGGExp.Global = True
  '      reGGExp.IgnoreCase = True
  '      Set r1Matches = reGGExp.Execute(GBEntry.Gene)
  '
  '           ' MsgBox GBEntry.Gene
  '            'MsgBox r1Matches.Count
  '
  '      GBEntry.Name = r1Matches.Item(0).SubMatches(0)
  '
  '
  '  End If
    
  
  '* give the gene name version 3.3.4 for Atv.5
  '  If InStr(1, GBEntry.Gene, "synonym: ") Then
  '  'GBEntry.Name = Mid(GBEntry.Gene, InStr(1, GBEntry.Gene, "synonym: ") + 9, InStr(InStr(1, GBEntry.Gene, "synonym: ") + 9, GBEntry.Gene, ";") - (InStr(1, GBEntry.Gene, "synonym: ") + 9))
  '
  '      reGGExp.Pattern = "synonym: ([ \S\w]+)\"""
  '      reGGExp.Global = True
  '      reGGExp.IgnoreCase = True
  '      Set r1Matches = reGGExp.Execute(Replace(Replace(GBEntry.Gene, vbCr, " "), "  ", ""))
  '
  '      MsgBox Replace(Replace(GBEntry.Gene, vbCr, " "), "  ", "")
  '      'MsgBox r1Matches.Count
  '
  '      GBEntry.Name = r1Matches.Item(0).SubMatches(0)
  '
  '  End If
   
  '* give the gene name version 3.3.4 for Atv.4
  '  If InStr(1, GBEntry.Gene, "locus_tag: ") Then
  '  'GBEntry.LocusTag = Mid(GBEntry.Gene, InStr(1, GBEntry.Gene, "locus_tag: ") + 11, 9)
  '
  '      reGGExp.Pattern = "locus_tag: \""([ \S\w]+)\"""
  '      reGGExp.Global = True
  '      reGGExp.IgnoreCase = True
  '      Set r1Matches = reGGExp.Execute(GBEntry.Gene)
  '
  '      GBEntry.Name = r1Matches.Item(0).SubMatches(0)
  '
  '  End If
    
    'xOutFileObj.write vbCr & GBEntry.Gene & vbCr '* control that the full entries are caught
   
     '* give the gene name version 3.3.4 for Atv.5
    If InStr(1, GBEntry.Gene, "locus_tag=") Then
    'GBEntry.LocusTag = Mid(GBEntry.Gene, InStr(1, GBEntry.Gene, "locus_tag=") + 11, 9)
    
        reGGExp.Pattern = "locus_tag=\""([ \S\w]+)\"""
        reGGExp.Global = True
        reGGExp.IgnoreCase = True
        Set r1Matches = reGGExp.Execute(GBEntry.Gene)
        
        'If InStr(1, GBEntry.Gene, "AT1G01020") > 0 Then MsgBox GBEntry.Gene
        
        GBEntry.Name = r1Matches.Item(0).SubMatches(0)
        
        GBEntry.LocusTag = GBEntry.Name
        'MsgBox GBEntry.Name
    
    End If
         
       
    
    '*debugging contol message
 'MsgBox GBEntry.LocusTag & "-locustag<>name-" & GBEntry.Name
    
    
End If



    




If GBEntry.Occuppied Then

    '* check for "redundant" enteries !! (alternative transcripts)
    ' this might be cheap skip of the beginning
    
    If GBEntry.LocusTag = GBEntryPrev.LocusTag And GBEntry.Name = GBEntryPrev.Name Then
     GoTo ProcessFinished
    End If
   
     
 '* in the curr gene
 '*get first info *(current entry)*
    '* it makes more sense to convert everything to the gene borders. new v.3.5.0
    
   If InStr(1, GBEntry.Gene, "     CDS  ") = 0 Then 'scan CDS encoding genes only
    
    'If frmGB_SQL.chkTO_NEXT_GENE.value Or frmGB_SQL.chkNO_OVERLAP.value Then
    
     'toggle GENE annot - watch out for CDS for readout
     'meaning: skip rRNA and tranposon, or other RNA genes
        RNA_Array = gbSQL_BC.SGBA(gbSQL_BC.prePARSER(GBEntry.Gene, "gene  "))
        ExonArray = gbSQL_BC.SGBA(gbSQL_BC.prePARSER(GBEntry.Gene, "gene  "))
        Lb5pCUR = RNA_Array(1)
        Lb3pCUR = RNA_Array(UBound(RNA_Array))
        St5pCUR = ExonArray(1)
        St3pCUR = ExonArray(UBound(ExonArray))
     'so you realize that if I dont do that, i should get all genes
     'which would be even more interesting - could really start dividing
     'up promoter structure - might be really frutiful!!!!!!!!!!
    'Else
    '  GoTo Not_CDS_Gene 'silence this - and all genes will give something out
    'End If
   
   Else 'the CDS is present - handel as before
   
    If frmGB_SQL.chkToggleGene.value Then
    
        RNA_Array = gbSQL_BC.SGBA(gbSQL_BC.prePARSER(GBEntry.Gene, "gene  "))
        ExonArray = gbSQL_BC.SGBA(gbSQL_BC.prePARSER(GBEntry.Gene, "gene  "))
    
    Else
            
     If GBEntry.mRNA <> "" Then
      RNA_Array = gbSQL_BC.SGBA(gbSQL_BC.prePARSER(GBEntry.mRNA, "mRNA  "))
      Lb5pCUR = RNA_Array(1)
      Lb3pCUR = RNA_Array(UBound(RNA_Array))
     End If 'if mRNA is present

     '* is REQUIRED by this script
     ExonArray = gbSQL_BC.SGBA(gbSQL_BC.prePARSER(GBEntry.CDS, "CDS  "))
     St5pCUR = ExonArray(1)
     St3pCUR = ExonArray(UBound(ExonArray))

     If GBEntry.mRNA = "" Then  'just in case!
     'CURR_TSS = 0
     RNA_Array = ExonArray
     Lb5pCUR = St5pCUR
     Lb3pCUR = St3pCUR
     End If
    
    End If  '* end of toggle-gene
    
End If 'Gene entry CDS is present or not, skipped if the genewatch is on or not
       
'MsgBox "next break"
    
    
 '* if the prev gene has already been logged - we can ?
 If (GBEntryPrev.Gene <> "") Then
   If (InStr(1, GBEntryPrev.Gene, "     CDS  ") = 0) Then    'scan CDS encoding genes only
    
    'If frmGB_SQL.chkTO_NEXT_GENE.value Or frmGB_SQL.chkNO_OVERLAP.value Then
    
     'toggle GENE annot - watch out for CDS for readout
     'meaning: skip rRNA and tranposon, or other RNA genes
        RNA_Array = gbSQL_BC.SGBA(gbSQL_BC.prePARSER(GBEntryPrev.Gene, "gene  "))
        ExonArray = gbSQL_BC.SGBA(gbSQL_BC.prePARSER(GBEntryPrev.Gene, "gene  "))
        Lb5pPRE = RNA_Array(1)
        Lb3pPRE = RNA_Array(UBound(RNA_Array))
        St5pPRE = ExonArray(1)
        St3pPRE = ExonArray(UBound(ExonArray))
     'so you realize that if I dont do that, i should get all genes
     'which would be even more interesting - could really start dividing
     'up promoter structure - might be really frutiful!!!!!!!!!!
   Else
    
    If GBEntryPrev.mRNA <> "" Then
     If frmGB_SQL.chkToggleGene.value Then
        RNA_Array = gbSQL_BC.SGBA(gbSQL_BC.prePARSER(GBEntryPrev.Gene, "gene  "))
        Else
        RNA_Array = gbSQL_BC.SGBA(gbSQL_BC.prePARSER(GBEntryPrev.mRNA, "mRNA  "))
     End If
     
     Lb5pPRE = RNA_Array(1)
     Lb3pPRE = RNA_Array(UBound(RNA_Array))
    End If 'if mRNA is present

   '*
   If GBEntryPrev.CDS <> "" Then
    ExonArray = gbSQL_BC.SGBA(gbSQL_BC.prePARSER(GBEntryPrev.CDS, "CDS  "))
    St5pPRE = ExonArray(1)
    St3pPRE = ExonArray(UBound(ExonArray))

     If GBEntryPrev.mRNA = "" Then  'just in case!
     'PREV_TSS = 0
     RNA_Array = ExonArray
     Lb5pPRE = St5pPRE
     Lb3pPRE = St3pPRE
     End If
   End If
  
 'End If 'NEXT or TO_GENE check
 End If 'CDS = 0 ?
End If 'prev entry has been found !
 '*get first info (current entry)


'MsgBox "next break 2"

'#record the postions of the gene, and orientations
'* check for beginning, if the gene is oriented 5'->3' then we extract the promoter
'* DONT FORGET THE vListArray comparisons

If GBEntry.Gene <> "" And GBEntryPrev.Gene = "" And GBEntry.Orient = 0 Then
'beginEntry = 0 is sense...
   '* for this version I made 2 new check boxes
   ' chkNO_OVERLAP    and    chkTO_NEXT_GENE  (only toNEXTgene is important)
   
   ' turn this off to get all genes - or add checks for gene types
   ' other options is to print out the gene type etc.
   'MsgBox "stop"
   If GBEntry.CDS = "" Then GoTo ProcessFinished
   
   '* CODE CHECK FOR NAME LIST ACCEPTANCE
    If frmGB_SQL.chSeqList.value = True Then
    
     If (MM.InList(GBEntry.LocusTag, vListArray, "%") = True And (GBEntry.LocusTag <> "")) Or (MM.InList(GBEntry.Name, vListArray, "%") = True And (GBEntry.Name <> "")) Then
      'GoTo isGOOD_1
     Else
      GoTo ProcessFinished
     End If
     
'isGOOD_1:
    End If
   '* END CODE CHECK FOR NAME LIST ACCEPTANCE
   
  If frmGB_SQL.chkTStart.value Then
    
    
    '* take only genes with a 5'UTR
    If frmGB_SQL.chkONLYw5pUTRs.value = True Then
     If Lb3pCUR = St3pCUR Then GoTo ProcessFinished
    End If
    
    
        Dwstm = Lb5pCUR + CLng(Val(frmGB_SQL.txtEnd.value))
        
        If frmGB_SQL.chkTO_NEXT_GENE.value Then
         Upstm = 1
        Else
         Upstm = Lb5pCUR + (CLng(Val(frmGB_SQL.txtBegin.value)))
        End If
        
    ElseIf frmGB_SQL.chkTstop.value Then
        
        Upstm = Lb3pCUR + CLng(Val(frmGB_SQL.txtBegin.value))
        Dwstm = Lb3pCUR + CLng(Val(frmGB_SQL.txtEnd.value))
    
    ElseIf frmGB_SQL.chkATG.value Then
   
        Dwstm = St5pCUR + CLng(Val(frmGB_SQL.txtEnd.value))
        
        If frmGB_SQL.chkTO_NEXT_GENE.value Then
         Upstm = 1
        Else
         Upstm = St5pCUR + CLng(Val(frmGB_SQL.txtBegin.value))
        End If
        
    ElseIf frmGB_SQL.chkSTOP.value Then
   
        Upstm = St3pCUR + CLng(Val(frmGB_SQL.txtBegin.value))
        Dwstm = St3pCUR + CLng(Val(frmGB_SQL.txtEnd.value))
    
    End If
   
   
  '****************************************************************
  If frmGB_SQL.optUpstream.value Then
        
   If Upstm < 1 Then Upstm = 1
   LbSeqAr = String((Dwstm - Upstm), " ")
   Get #FileNum, Upstm, LbSeqAr

   'xOutFileObj.write gbSQL_BC.FASTA_GeneNameString(GBEntry.LocusTag, GBEntry.Name, "rglUp")

  End If
 
 '****************************************************************
 If frmGB_SQL.optDownstream.value Then

    If Dwstm > LOF(FileNum) Then Dwstm = LOF(FileNum)
    LbSeqAr = String(Dwstm - Upstm, " ")
    Get #FileNum, Upstm + 1, LbSeqAr

'xOutFileObj.write gbSQL_BC.FASTA_GeneNameString(GBEntry.LocusTag, GBEntry.Name, "rglDwn")
   
 End If
  
 If LbSeqAr <> "" Then
   'xOutFileObj.write " size:=" & str(Dwstm - Upstm) + vbNewLine
   'xOutFileObj.write LbSeqAr + vbNewLine
   
   '*orientation is sense, write out current gene
   MASTERgeneLIST = MASTERgeneLIST + UCase(GBEntry.LocusTag) + "%"
   ReDim Preserve xOutSeqArr(UBound(xOutSeqArr) + 1)
   xOutSeqArr(UBound(xOutSeqArr)) = LbSeqAr
      
 End If


 

ElseIf GBEntry.Gene <> "" And GBEntryPrev.Gene <> "" Then
 'we have an intergenic region between genes!
 '---------------------------------------------------------------

  '* - - - - - - - - - - - - -
   If GBEntryPrev.Orient = 1 Then  '* output the previous ANTISENSE orientated sequence
    
   ' turn this off to get all genes - or add checks for gene types
   ' other options is to print out the gene type etc.
   If GBEntryPrev.CDS = "" Then GoTo ProcessNextinLine
   
    '* CODE CHECK FOR NAME LIST ACCEPTANCE
    If frmGB_SQL.chSeqList.value = True Then
    
     If (MM.InList(GBEntryPrev.LocusTag, vListArray, "%") = True And (GBEntryPrev.LocusTag <> "")) Or (MM.InList(GBEntryPrev.Name, vListArray, "%") = True And (GBEntryPrev.Name <> "")) Then
      'GoTo isGOOD_2
     Else
      GoTo ProcessFinished
     End If
     
'isGOOD_2:
    End If
   '* END CODE CHECK FOR NAME LIST ACCEPTANCE
    
    'MsgBox (Lb5pCUR > Lb3pPRE) And (St5pCUR > St3pPRE)
    
    '* OUTPUT -this forces non-overlapping genes - which is not absolute
    If (Lb5pCUR > Lb3pPRE) And (St5pCUR > St3pPRE) Then   'use the mNRA (transcription positions)
   
    If frmGB_SQL.chkTStart.value Then
    
    
    '* take only genes with a 5'UTR
    If frmGB_SQL.chkONLYw5pUTRs.value = True Then
     If Lb3pPRE = St3pPRE Then GoTo ProcessFinished
    End If
    
    
    
        If frmGB_SQL.chkTO_NEXT_GENE.value Then
         Dwstm = Lb5pCUR
         Upstm = Lb3pPRE
        Else
         
         
         Dwstm = Lb3pPRE + -(CLng(Val(frmGB_SQL.txtBegin.value)))
         If frmGB_SQL.chkNO_OVERLAP.value Then '* UNTIL next TSS/begin or END
          If Dwstm > Lb5pCUR Then Dwstm = Lb5pCUR
         End If

                 
        Upstm = Lb3pPRE + -(CLng(Val(frmGB_SQL.txtEnd.value)))
        End If
    
    ElseIf frmGB_SQL.chkTstop.value Then
               
        If frmGB_SQL.chkTO_NEXT_GENE.value Then
         Dwstm = Lb5pCUR
         Upstm = Lb5pPRE
        Else
         
         
         Dwstm = Lb5pPRE + -(CLng(Val(frmGB_SQL.txtBegin.value)))
         If frmGB_SQL.chkNO_OVERLAP.value Then '* UNTIL next TSS/begin or END
          If Dwstm > Lb5pCUR Then Dwstm = Lb5pCUR
         End If
                         
        Upstm = Lb5pPRE + -(CLng(Val(frmGB_SQL.txtEnd.value)))
        End If
    
    ElseIf frmGB_SQL.chkATG.value Then
          
        If frmGB_SQL.chkTO_NEXT_GENE.value Then
         Dwstm = St5pCUR
         Upstm = St3pPRE
        Else
         
         Dwstm = St3pPRE + -(CLng(Val(frmGB_SQL.txtBegin.value)))
         If frmGB_SQL.chkNO_OVERLAP.value Then '* UNTIL next ATG/STOP
          If Dwstm > St5pCUR Then Dwstm = St5pCUR
         End If
                         
        Upstm = St3pPRE + -(CLng(Val(frmGB_SQL.txtEnd.value)))
        End If
    
    ElseIf frmGB_SQL.chkSTOP.value Then
   
        If frmGB_SQL.chkTO_NEXT_GENE.value Then
         Dwstm = St5pCUR
         Upstm = St5pPRE
        Else
                  
         Dwstm = St5pPRE + -(CLng(Val(frmGB_SQL.txtBegin.value)))
         If frmGB_SQL.chkNO_OVERLAP.value Then '* UNTIL next ATG/STOP
          If Dwstm > St5pCUR Then Dwstm = St5pCUR
         End If
                          
        Upstm = St5pPRE + -(CLng(Val(frmGB_SQL.txtEnd.value)))
        End If
    
    End If
   
  '****************************************************************
  If frmGB_SQL.optUpstream.value Then

   If Dwstm > LOF(FileNum) Then Dwstm = LOF(FileNum)
   LbSeqAr = String((Dwstm - Upstm), " ")
   Get #FileNum, Upstm + 1, LbSeqAr

     'xOutFileObj.write gbSQL_BC.FASTA_GeneNameString(GBEntryPrev.LocusTag, GBEntryPrev.Name, "rglUp")
   
    End If
 
 '****************************************************************
 If frmGB_SQL.optDownstream.value Then

   If Upstm < 1 Then Upstm = 1
   LbSeqAr = String(Dwstm - Upstm, " ")
   Get #FileNum, Upstm, LbSeqAr

    'xOutFileObj.write gbSQL_BC.FASTA_GeneNameString(GBEntryPrev.LocusTag, GBEntryPrev.Name, "rglDwn")
   
 End If
  
 If LbSeqAr <> "" Then
   'xOutFileObj.write " size:=" & str(Dwstm - Upstm) + vbNewLine
   'xOutFileObj.write MM.RevComp(LbSeqAr) + vbNewLine
   
      '*orientation is sense, write out current gene
   MASTERgeneLIST = MASTERgeneLIST + UCase(GBEntryPrev.LocusTag) + "%"
   ReDim Preserve xOutSeqArr(UBound(xOutSeqArr) + 1)
   xOutSeqArr(UBound(xOutSeqArr)) = MM.RevComp(LbSeqAr)
   
   
 End If
   '****************************************************************

    End If '-the previous is upstream of the current gene
   End If 'previous anstisense oriented


ProcessNextinLine:
   '* - - - - - - - - - - - -
   If GBEntry.Orient = 0 Then

   ' turn this off to get all genes - or add checks for gene types
   ' other options is to print out the gene type etc.
   If GBEntry.CDS = "" Then GoTo ProcessFinished

       '* check if we are looking for particular sequences
    If frmGB_SQL.chSeqList.value = True Then
    
     If (MM.InList(GBEntry.LocusTag, vListArray, "%") = True And (GBEntry.LocusTag <> "")) Or (MM.InList(GBEntry.Name, vListArray, "%") = True And (GBEntry.Name <> "")) Then
      'GoTo isGOOD_3
     Else
      GoTo ProcessFinished
     End If
     
'isGOOD:
    End If

    '* recall that this forces non-overlapping genes for output
    If (Lb5pCUR > Lb3pPRE) And (St5pCUR > St3pPRE) Then     'use the mNRA (transcription positions)
   
       If frmGB_SQL.chkTStart.value Then
           
    '* take only genes with a 5'UTR
    If frmGB_SQL.chkONLYw5pUTRs.value = True Then
     If Lb3pCUR = St3pCUR Then GoTo ProcessFinished
    End If
                      
           
           
        If frmGB_SQL.chkTO_NEXT_GENE.value Then
         Dwstm = Lb5pCUR
         Upstm = Lb3pPRE
        Else
        
         Upstm = Lb5pCUR + (CLng(frmGB_SQL.txtBegin.value))
         If frmGB_SQL.chkNO_OVERLAP.value Then '* UNTIL next TSS/begin or END
          If Upstm < Lb3pPRE Then Upstm = Lb3pPRE
         End If
                         
        Dwstm = Lb5pCUR + CLng(Val(frmGB_SQL.txtEnd.value))
        End If
    
    ElseIf frmGB_SQL.chkTstop.value Then
        
       
        If frmGB_SQL.chkTO_NEXT_GENE.value Then
         Dwstm = Lb3pCUR
         Upstm = Lb3pPRE
        Else
         
         Upstm = Lb3pCUR + CLng(frmGB_SQL.txtBegin.value)
         If frmGB_SQL.chkNO_OVERLAP.value Then '* UNTIL next TSS/begin or END
          If Upstm < Lb3pPRE Then Upstm = Lb3pPRE
         End If
                         
        Dwstm = Lb3pCUR + CLng(Val(frmGB_SQL.txtEnd.value))
        End If
    
    ElseIf frmGB_SQL.chkATG.value Then
          
        If frmGB_SQL.chkTO_NEXT_GENE.value Then
         Dwstm = St5pCUR
         Upstm = St3pPRE
        Else
         
         Upstm = St5pCUR + CLng(frmGB_SQL.txtBegin.value)
         If frmGB_SQL.chkNO_OVERLAP.value Then '* UNTIL next TSS/begin or END
          If Upstm < St3pPRE Then Upstm = St3pPRE
         End If
                         
        Dwstm = St5pCUR + CLng(Val(frmGB_SQL.txtEnd.value))
        End If
        
    
    ElseIf frmGB_SQL.chkSTOP.value Then
           
        If frmGB_SQL.chkTO_NEXT_GENE.value Then
         Dwstm = St3pCUR
         Upstm = St3pPRE
        Else
        
         Upstm = St3pCUR + CLng(frmGB_SQL.txtBegin.value)
         If frmGB_SQL.chkNO_OVERLAP.value Then '* UNTIL next TSS/begin or END
          If Upstm < St3pPRE Then Upstm = St3pPRE
         End If
                         
        Dwstm = St3pCUR + CLng(Val(frmGB_SQL.txtEnd.value))
        End If
    
    End If

   
  '****************************************************************
  If frmGB_SQL.optUpstream.value Then

   If Upstm < 1 Then Upstm = 1
   LbSeqAr = String((Dwstm - Upstm), " ")
   Get #FileNum, Upstm, LbSeqAr

'xOutFileObj.write gbSQL_BC.FASTA_GeneNameString(GBEntry.LocusTag, GBEntry.Name, "rglUp")

  End If
 
 '****************************************************************
 If frmGB_SQL.optDownstream.value Then

   If Dwstm > LOF(FileNum) Then Dwstm = LOF(FileNum)
   LbSeqAr = String(Dwstm - Upstm, " ")
   Get #FileNum, Upstm + 1, LbSeqAr

'xOutFileObj.write gbSQL_BC.FASTA_GeneNameString(GBEntry.LocusTag, GBEntry.Name, "rglDwn")
   
 End If
  
 If LbSeqAr <> "" Then
   'xOutFileObj.write " size:=" & str(Dwstm - Upstm) + vbNewLine
   'xOutFileObj.write LbSeqAr + vbNewLine
   
      '*orientation is sense, write out current gene
   MASTERgeneLIST = MASTERgeneLIST + UCase(GBEntry.LocusTag) + "%"
   ReDim Preserve xOutSeqArr(UBound(xOutSeqArr) + 1)
   xOutSeqArr(UBound(xOutSeqArr)) = LbSeqAr
   
 End If
   
   
   
   
    GoTo ProcessFinished
    End If '-frmGB_SQL.chkmRNA.Value
   End If 'current sense oriented, we do antisense on a second pass

End If
'get last on exit


'-- reset values
'* EXIT ***********************
ProcessFinished:

If InStr(1, Lread, "ORIGIN") < 1 Then

 'Previous = GBEntry.LocusTag
 GBEntryPrev = GBEntry
 
 GenFlag = 0
 LbSeqAr = ""
 GBEntry.Gene = ""
 GBEntry.CDS = ""
 GBEntry.mRNA = ""
 GBEntry.Name = ""
 GBEntry.LocusTag = ""
 GBEntry.Orient = Empty
 GBEntry.Occuppied = 0
 
End If 'don't throw away the last objects!
 
End If '- occupp ied -----------------------------------------------------------------


If GenFlag = 1 Then
 GBEntry.Gene = GBEntry.Gene + Lread + vbCr
  'MsgBox GBEntryPrev.Gene
End If

If InStr(1, Lread, "     gene  ") Then 'using the previous default below for CDS exit ("/")
 GBEntry.Gene = Lread + vbCr           'this entry would be one ahead of the CDS anno
 GenFlag = 1
 If InStr(1, Lread, "complement") Then
 GBEntry.Orient = 1
 Else
 GBEntry.Orient = 0
 End If
End If 'catch gene annotation flag

'---#MAIN LOOP of GenBank file
Loop Until xMapFileObj.atEndofStream Or InStr(1, Lread, "ORIGIN      ") >= 1


'* LAST GENE CALL -
'* only called when the last one is antisense
If GBEntryPrev.Gene <> "" And GBEntry.Orient = 1 Then

   ' turn this off to get all genes - or add checks for gene types
   ' other options is to print out the gene type etc.
   'If GBEntry.CDS = "" Then Exit Function

  '* END CODE CHECK FOR NAME LIST ACCEPTANCE - sense the pairs are handeled in the
  '* check if we are looking for particular sequences
    If frmGB_SQL.chSeqList.value = True Then
    
     If (MM.InList(GBEntry.LocusTag, vListArray, "%") = True And (GBEntry.LocusTag <> "")) Or (MM.InList(GBEntry.Name, vListArray, "%") = True And (GBEntry.Name <> "")) Then
      'GoTo isGOOD_3
     Else
      GoTo ProcessFinished
     End If
     
'isGOOD:
    End If
  '- the next IF-THEN statement
  
  
    If frmGB_SQL.chkTStart.value Then
    
    
    '* take only genes with a 5'UTR
    If frmGB_SQL.chkONLYw5pUTRs.value = True Then
     If Lb3pCUR = St3pCUR Then GoTo FINALEXIT
    End If
    
    
    
        If frmGB_SQL.chkTO_NEXT_GENE.value Then
         Dwstm = LOF(FileNum)
        Else
         Dwstm = Lb3pCUR + -(CLng(Val(frmGB_SQL.txtBegin.value)))
        End If
        
        Upstm = Lb3pCUR + -(CLng(Val(frmGB_SQL.txtEnd.value)))
   
    ElseIf frmGB_SQL.chkTstop.value Then
        
        Dwstm = Lb5pCUR + -(CLng(Val(frmGB_SQL.txtBegin.value)))
        Upstm = Lb5pCUR + -(CLng(Val(frmGB_SQL.txtEnd.value)))
   
    ElseIf frmGB_SQL.chkATG.value Then
   
       
        If frmGB_SQL.chkTO_NEXT_GENE.value Then
         Dwstm = LOF(FileNum)
        Else
         Dwstm = St3pCUR + -(CLng(Val(frmGB_SQL.txtBegin.value)))
        End If
        
        Upstm = St3pCUR + -(CLng(Val(frmGB_SQL.txtEnd.value)))
    
    ElseIf frmGB_SQL.chkSTOP.value Then
   
        Dwstm = St5pCUR + -(CLng(Val(frmGB_SQL.txtBegin.value)))
        Upstm = St5pCUR + -(CLng(Val(frmGB_SQL.txtEnd.value)))

    End If
   
   
   
  '****************************************************************
  If frmGB_SQL.optUpstream.value Then

   If Dwstm > LOF(FileNum) Then Dwstm = LOF(FileNum)
   LbSeqAr = String((Dwstm - Upstm), " ")
   Get #FileNum, Upstm + 1, LbSeqAr

'xOutFileObj.write gbSQL_BC.FASTA_GeneNameString(GBEntry.LocusTag, GBEntry.Name, "rglUp")

  End If
 
 '****************************************************************
 If frmGB_SQL.optDownstream.value Then

  If Upstm < 1 Then Upstm = 1
  LbSeqAr = String(Dwstm - Upstm, " ")
  Get #FileNum, Upstm, LbSeqAr

'xOutFileObj.write gbSQL_BC.FASTA_GeneNameString(GBEntry.LocusTag, GBEntry.Name, "rglDwn")
   
 End If
  
 If LbSeqAr <> "" Then
   'xOutFileObj.write " size:=" & str(Dwstm - Upstm) + vbNewLine
   'xOutFileObj.write MM.RevComp(LbSeqAr) + vbNewLine
   
      '*orientation is sense, write out current gene
   MASTERgeneLIST = MASTERgeneLIST + UCase(GBEntry.LocusTag) + "%"
   ReDim Preserve xOutSeqArr(UBound(xOutSeqArr) + 1)
   xOutSeqArr(UBound(xOutSeqArr)) = LbSeqAr
   
 End If

'*OUTPUT (end, final)
End If

FINALEXIT:
Close FileNum
REGULATORY_REGIONS_TAIRv7 = ""



End Function



